#!/bin/sh
# Bind Mellanox mlx5en interrupt threads to different core

# PROVIDE:      mlx5en_affinity
# REQUIRE:      FILESYSTEMS netif
# KEYWORD:      nojail

#
# Add the following lines to /etc/rc.conf.local or /etc/rc.conf
# to enable this service:
#
# mlx5en_affinity_enable (bool):   Set to YES by default.
#               Set it to YES to bind interrupt threads to different cores
# mlx5en_affinity_avoidncpu (int):  0 by default.
#	            Set to number of CPU to exclude from binding
# mlx5en_affinity_firstcpu (int): 0 by default.
#               Set the first CPU id (for starting at second numa-domain)

. /etc/rc.subr

name="mlx5en_affinity"
rcvar=${name}_enable

start_cmd="mlx5en_affinity_start"

mlx5en_affinity_start()
{
	# Need a mce NIC
	NQ=$(/sbin/sysctl -ni dev.mce.0.conf.channels)
	if [ -z "${NQ}" ]; then
		echo "No Mellanox mlx5en NIC port detected"
		exit 0;
	fi

	# Work with 2 CPU minimum
	NCPU=$(/sbin/sysctl -n hw.ncpu)
	if [ ${NCPU} -le 2 ]; then
		echo "Not enought CPU detected"
		exit 0
	fi

	# Exclude some CPU from binding
	NCPU=$((NCPU - ${mlx5en_affinity_avoidncpu}))
	CPU=${mlx5en_affinity_firstcpu}

	# Number of NIC
	nic=$(sysctl dev.mce. | grep -c hw_mtu)
	if [  ${nic} -eq 0 ]; then
		echo "No Mellanox card detected"
		exit 0
	fi

	# XXX Need to guest the CPU numa domain
	for i in $(seq 0 $((nic - 1))); do
		# Need to filter the first 3 IRQ that seems not related to RX queues
		# Need to filter only the used IRQ (=number of channels)
		for IRQ in `/usr/bin/vmstat -ai |\
			/usr/bin/sed -nE "/mlx5_core${i}/s/irq([[:digit:]]+):.*/\1/p" | tail -n +4 | head -${NQ}`; do
			echo "Bind mlx5_core${i} IRQ ${IRQ} to CPU ${CPU}"
			/usr/bin/cpuset -l ${CPU} -x ${IRQ}
			CPU=$(((CPU + 1) % NCPU))
		done
		CPU=${mlx5en_affinity_firstcpu}
	done
}

load_rc_config $name

: ${mlx5en_affinity_enable="NO"}
: ${mlx5en_affinity_avoidncpu=0}
: ${mlx5en_affinity_firstcpu=0}

run_rc_command "$1"

